torchrun --nproc_per_node=8 train_grpo.py